Installing/ loading libraries
if(!require("quanteda")) {install.packages("quanteda"); library("quanteda")}
if(!require("lubridate")) {install.packages("readtext"); library("readtext")}
if(!require("tidyverse")) {install.packages("tidyverse"); library("tidyverse")}
if(!require("pdftools")) {install.packages("pdftools"); library("pdftools")}
theme_set(theme_light())library(dplyr)
library(haven)## Warning: Paket 'haven' wurde unter R Version 4.1.3 erstellt
library(parameters)## Warning: Paket 'parameters' wurde unter R Version 4.1.3 erstellt
library(performance)## Warning: Paket 'performance' wurde unter R Version 4.1.3 erstellt
library(see)## Warning: Paket 'see' wurde unter R Version 4.1.3 erstellt
## here() starts at C:/Users/batzdova/Desktop/EC-Web-Scrapping-and-Text-Mining
## [1] 435
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
## PDF error: Invalid Font Weight
#extract ids (of length of 7 characters) for the docs to match with metadata later
#N= 435 docs
#Problem: multiple docs for same id (= multiple docs by same submitter)
tb_pdf$Document <- str_remove(tb_pdf$Document, "C:/Users/batzdova/Desktop/EC-Web-Scrapping-and-Text-Mining/Data/Public_consultation_2020/files/")
ids <-substr(tb_pdf$Document, 1,7)
tb_pdf$id <- idslibrary(readr)
Public_consultation_2020 <- read_delim("./Data/Public_consultation_2020/files/Public_consultation_2020.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE)## New names:
## Rows: 1216 Columns: 73
## -- Column specification
## -------------------------------------------------------- Delimiter: ";" chr
## (73): Reference, Feedback date, Language, User type, First name, Surname...
## i Use `spec()` to retrieve the full column specification for this data. i
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## * `Other, please specify: ` -> `Other, please specify: ...47`
## * `Other, please specify: ` -> `Other, please specify: ...49`
consult_meta <- as_tibble(Public_consultation_2020)temp <- left_join(consult_meta, tb_pdf, by = c("Reference" = "id")) %>% as_tibble()
temp %>% filter(is.na(text)) #observations without pdf textRecoding the survey (First submission round)
#drop variables
#var column nr. 73: temp[,73]
temp <- temp %>%
select(! `You can upload a document here:\n\n` ) %>%
select(! `Publication privacy settings` )
#renaming variables
temp <- temp %>%
rename(filename = Document,
country = Country,
org = `Organisation name`,
id = Reference,
time = `Feedback date`,
lang = Language,
type = `User type`,
firstname = `First name`,
surname = Surname,
scope = Scope,
register = `Transparency register number`,
size = `Organisation size`) %>%
rename_with (~ 'coop_member_states', matches('Working with Member states')) %>%
rename_with (~ 'research_innov', matches('Focussing the efforts of the research and innovation community')) %>%
rename_with (~ 'skills', matches('\n: Skills')) %>%
rename_with (~ 'SME', matches('\n: Focus on SMEs')) %>%
rename_with (~ 'private_sector', matches('\n: Partnership with the private sector')) %>%
rename_with (~ 'public_sector', matches('\n: Promoting the adoption of AI by the public sector')) %>%
rename_with (~ 'other_action', matches('other actions that should be considered?')) %>%
rename_with (~ 'excel_research', matches('\n: Strengthen excellence in research')) %>%
rename_with (~ 'testing_fac', matches('Establish world-reference testing facilities for AI')) %>%
rename_with (~ 'uptake_ai', matches('Promote the uptake of AI by business and the public sector')) %>%
rename_with (~ 'startup_finance', matches('Increase the financing for start-ups innovating in AI')) %>%
rename_with (~ 'training_skills', matches('Develop skills for AI and adapt existing training programmes')) %>%
rename_with (~ 'eu_data_space', matches('Build up the European data space')) %>%
rename_with (~ 'other_area', matches('Are there other areas that that should be considered')) %>%
rename_with (~ 'lighthouse', matches('Support the establishment of a lighthouse research centre that is world class and able to attract the best minds')) %>%
rename_with (~ 'net_centres', matches('Network of existing AI research excellence centres')) %>%
rename_with (~ 'partner_research', matches('Set up a public-private partnership for industrial research')) %>% rename_with (~ 'action_research', matches('actions to strengthen the research and innovation community that should be given a priority')) %>%
rename_with (~ 'benefits_ai', matches('Help to raise SME’s awareness about potential benefits of AI')) %>%
rename_with (~ 'access_testing', matches('Provide access to testing and reference facilities')) %>%
rename_with (~ 'knowhow_transfer', matches('Promote knowledge transfer and support the development of AI expertise for SMEs')) %>%
rename_with (~ 'partner_aiproject', matches('Support partnerships between SMEs, larger enterprises and academia around AI projects')) %>%
rename_with (~ 'equity_finance', matches('Provide information about equity financing for AI startups')) %>%
rename_with (~ 'tasks_innovhub', matches('important for specialised Digital Innovations Hubs')) %>%
rename_with (~ 'concern_safety', matches('AI may endanger safety')) %>%
rename_with (~ 'concern_rights', matches('AI may breach fundamental rights')) %>%
rename_with (~ 'concern_safety', matches('AI may endanger safety')) %>%
rename_with (~ 'concern_discrim', matches('The use of AI may lead to discriminatory outcomes')) %>%
rename_with (~ 'concern_explain', matches('AI may take actions for which the rationale cannot be explained')) %>%
rename_with (~ 'concern_compensat', matches('AI may make it more difficult for persons having suffered harm to obtain compensation')) %>%
rename_with (~ 'concern_accuracy', matches('AI is not always accurate')) %>%
rename_with (~ 'concern_other', matches('Do you have any other concerns about AI that are not mentioned')) %>%
rename_with (~ 'leg_rules', matches('Do you think that the concerns expressed above can be addressed by applicable EU legislation')) names(temp)[44]<- "rules_other"
names(temp)[45]<- "rules_highrisk"
names(temp)[46]<- "mitigate_other"
names(temp)[47]<- "highrisk_approach"
names(temp)[48]<- "highrisk_other"
names(temp)[49]<- "highrisk_app"
names(temp)[50]<- "requir_qual_training_data"
names(temp)[51]<- "requir_record_data"
names(temp)[52]<- "requir_purpose"
names(temp)[53]<- "requir_robust_acc"
names(temp)[54]<- "requir_human_oversight"
names(temp)[55]<- "requir_liability"
names(temp)[56]<- "requir_biometric"
names(temp)[57]<- "requir_spec"
names(temp)[58]<- "label_aisystem"
names(temp)[59]<- "label_suggest"
names(temp)[60]<- "trust_spec"
names(temp)[61]<- "trust_enforce"
names(temp)[62]<- "compliance_spec"
names(temp)[63]<- "risk_spec"
names(temp)[64]<- "risk_reform"
names(temp)[65]<- "reform_assess"
names(temp)[65]<- "risk_procedure"
names(temp)[66]<- "risk_other"
names(temp)[67]<- "liability_reform"
names(temp)[68]<- "liabilty_further"
names(temp)[69]<- "liability_national"
names(temp)[70]<- "liabilty_app"
names(temp)[71]<- "liabilty_other"#cooperation member states (Likert scale 1-5 (not important - very important))
temp <- temp %>%
mutate(coop_member_states = case_when(
coop_member_states == "5 - Very important" ~ 5,
coop_member_states == "4 - Important" ~ 4,
coop_member_states == "3 - Neutral" ~ 3,
coop_member_states == "2 - Not important" ~ 2,
coop_member_states == "1 - Not important at all" ~ 1,
coop_member_states == "No opinion" ~ 0)
)c(summary(temp$coop_member_states)[c("Min.", "Max.", "Mean")], "sd" = sd(temp$coop_member_states, na.rm = FALSE)) %>% round(digits = 2)## Min. Max. Mean sd
## 0.00 5.00 4.28 NA
hist(temp$coop_member_states, breaks = 60)# recoding research innovation focus (research_innov)
temp <- temp %>%
mutate(research_innov = case_when(
research_innov == "5 - Very important" ~ 5,
research_innov == "4 - Important" ~ 4,
research_innov == "3 - Neutral" ~ 3,
research_innov == "2 - Not important" ~ 2,
research_innov == "1 - Not important at all" ~ 1,
research_innov == "No opinion" ~ 0)
)#recoding skill (skills)
temp <- temp %>%
mutate(skills = case_when(
skills == "5 - Very important" ~ 5,
skills == "4 - Important" ~ 4,
skills == "3 - Neutral" ~ 3,
skills == "2 - Not important" ~ 2,
skills == "1 - Not important at all" ~ 1,
skills == "No opinion" ~ 0)
)#recoding SME (SME)
temp <- temp %>%
mutate(SME = case_when(
SME == "5 - Very important" ~ 5,
SME == "4 - Important" ~ 4,
SME == "3 - Neutral" ~ 3,
SME == "2 - Not important" ~ 2,
SME == "1 - Not important at all" ~ 1,
SME == "No opinion" ~ 0)
)#partnership w. private sector (private_sector)
temp <- temp %>%
mutate(private_sector = case_when(
private_sector == "5 - Very important" ~ 5,
private_sector == "4 - Important" ~ 4,
private_sector == "3 - Neutral" ~ 3,
private_sector == "2 - Not important" ~ 2,
private_sector == "1 - Not important at all" ~ 1,
private_sector == "No opinion" ~ 0)
)#partnership w. public_sector (public_sector)
temp <- temp %>%
mutate(public_sector = case_when(
public_sector == "5 - Very important" ~ 5,
public_sector == "4 - Important" ~ 4,
public_sector == "3 - Neutral" ~ 3,
public_sector == "2 - Not important" ~ 2,
public_sector == "1 - Not important at all" ~ 1,
public_sector == "No opinion" ~ 0)
)#Strengthen excellence in research (excel_research)
temp <- temp %>%
mutate(excel_research = case_when(
excel_research == "5 - Very important" ~ 5,
excel_research == "4 - Important" ~ 4,
excel_research == "3 - Neutral" ~ 3,
excel_research == "2 - Not important" ~ 2,
excel_research == "1 - Not important at all" ~ 1,
excel_research == "No opinion" ~ 0)
)#Establish world-reference testing facilities for AI (testing_fac)
temp <- temp %>%
mutate(testing_fac = case_when(
testing_fac == "5 - Very important" ~ 5,
testing_fac == "4 - Important" ~ 4,
testing_fac == "3 - Neutral" ~ 3,
testing_fac == "2 - Not important" ~ 2,
testing_fac == "1 - Not important at all" ~ 1,
testing_fac == "No opinion" ~ 0)
)#Promote the uptake of AI by business and the public sector (uptake_ai)
temp <- temp %>%
mutate(uptake_ai = case_when(
uptake_ai == "5 - Very important" ~ 5,
uptake_ai == "4 - Important" ~ 4,
uptake_ai == "3 - Neutral" ~ 3,
uptake_ai == "2 - Not important" ~ 2,
uptake_ai == "1 - Not important at all" ~ 1,
uptake_ai == "No opinion" ~ 0)
)#Increase the financing for start-ups innovating in AI (startup_finance)
temp <- temp %>%
mutate(startup_finance = case_when(
startup_finance == "5 - Very important" ~ 5,
startup_finance == "4 - Important" ~ 4,
startup_finance == "3 - Neutral" ~ 3,
startup_finance == "2 - Not important" ~ 2,
startup_finance == "1 - Not important at all" ~ 1,
startup_finance == "No opinion" ~ 0)
)#Develop skills for AI and adapt existing training programmes (training_skills)
temp <- temp %>%
mutate(training_skills = case_when(
training_skills == "5 - Very important" ~ 5,
training_skills == "4 - Important" ~ 4,
training_skills == "3 - Neutral" ~ 3,
training_skills == "2 - Not important" ~ 2,
training_skills == "1 - Not important at all" ~ 1,
training_skills == "No opinion" ~ 0)
)#Build up the European data space (eu_data_space)
temp <- temp %>%
mutate(eu_data_space = case_when(
eu_data_space == "5 - Very important" ~ 5,
eu_data_space == "4 - Important" ~ 4,
eu_data_space == "3 - Neutral" ~ 3,
eu_data_space == "2 - Not important" ~ 2,
eu_data_space == "1 - Not important at all" ~ 1,
eu_data_space == "No opinion" ~ 0)
)#establishment of a lighthouse research centre (lighthouse)
temp <- temp %>%
mutate(lighthouse = case_when(
lighthouse == "5 - Very important" ~ 5,
lighthouse == "4 - Important" ~ 4,
lighthouse == "3 - Neutral" ~ 3,
lighthouse == "2 - Not important" ~ 2,
lighthouse == "1 - Not important at all" ~ 1,
lighthouse == "No opinion" ~ 0)
)#Network of existing AI research excellence centres (net_centres)
temp <- temp %>%
mutate(net_centres = case_when(
net_centres == "5 - Very important" ~ 5,
net_centres == "4 - Important" ~ 4,
net_centres == "3 - Neutral" ~ 3,
net_centres == "2 - Not important" ~ 2,
net_centres == "1 - Not important at all" ~ 1,
net_centres == "No opinion" ~ 0)
)#Set up a public-private partnership for industrial research (partner_research)
temp <- temp %>%
mutate(partner_research = case_when(
partner_research == "5 - Very important" ~ 5,
partner_research == "4 - Important" ~ 4,
partner_research == "3 - Neutral" ~ 3,
partner_research == "2 - Not important" ~ 2,
partner_research == "1 - Not important at all" ~ 1,
partner_research == "No opinion" ~ 0)
)#SMEs awareness about potential benefits of AI (benefits_ai)
temp <- temp %>%
mutate(benefits_ai = case_when(
benefits_ai == "5 - Very important" ~ 5,
benefits_ai == "4 - Important" ~ 4,
benefits_ai == "3 - Neutral" ~ 3,
benefits_ai == "2 - Not important" ~ 2,
benefits_ai == "1 - Not important at all" ~ 1,
benefits_ai == "No opinion" ~ 0)
)#Provide access to testing and reference facilities(access_testing)
temp <- temp %>%
mutate(access_testing = case_when(
access_testing == "5 - Very important" ~ 5,
access_testing == "4 - Important" ~ 4,
access_testing == "3 - Neutral" ~ 3,
access_testing == "2 - Not important" ~ 2,
access_testing == "1 - Not important at all" ~ 1,
access_testing == "No opinion" ~ 0)
)#Promote knowledge transfer and support the development of AI expertise for SMEs(knowhow_transfer)
temp <- temp %>%
mutate(knowhow_transfer = case_when(
knowhow_transfer == "5 - Very important" ~ 5,
knowhow_transfer == "4 - Important" ~ 4,
knowhow_transfer== "3 - Neutral" ~ 3,
knowhow_transfer == "2 - Not important" ~ 2,
knowhow_transfer == "1 - Not important at all" ~ 1,
knowhow_transfer == "No opinion" ~ 0)
)#partnerships between SMEs, larger enterprises and academia around AI projects(partner_aiproject)
temp <- temp %>%
mutate(partner_aiproject = case_when(
partner_aiproject == "5 - Very important" ~ 5,
partner_aiproject == "4 - Important" ~ 4,
partner_aiproject == "3 - Neutral" ~ 3,
partner_aiproject == "2 - Not important" ~ 2,
partner_aiproject == "1 - Not important at all" ~ 1,
partner_aiproject == "No opinion" ~ 0)
)#information about equity financing for AI startups(equity_finance)
temp <- temp %>%
mutate(equity_finance = case_when(
equity_finance == "5 - Very important" ~ 5,
equity_finance == "4 - Important" ~ 4,
equity_finance == "3 - Neutral" ~ 3,
equity_finance == "2 - Not important" ~ 2,
equity_finance == "1 - Not important at all" ~ 1,
equity_finance == "No opinion" ~ 0)
)tidy_df1 <-temp %>% unite("person", firstname:surname, sep = " ")
#add column indicative for first consultation round
tidy_df1 <- tidy_df1 %>%
mutate(consult_round = "one")tidy_df1 <- tidy_df1 %>% mutate(type = recode(type, #old value = new value
`NGO (Non-governmental organisation)` = "Non-governmental organisation (NGO)",
`Academic/Research Institution` = "Academic/research Institution",
`EU Citizen` = "EU citizen" ,
`Company/Business organisation` = "Company/business organisation",
`Consumer Organisation` = "Consumer organisation",
`Trade Union` = "Trade union",
`Business Association` = "Business association"
))tidy_df1 <- tidy_df1 %>% mutate(size = recode(size, #old value = new value
`Medium (< 250 employees)` = "Medium (50 to 249 employees)",
`Small (< 50 employees)` = "Small (10 to 49 employees)",
`Micro (< 10 employees)` = "Micro (1 to 9 employees)"))roadmap_2020 and final round comission_adoption_2021library(readr)
commission_adoption_2021 <- read_csv("./Augmented_data/commission_adoption_2021.csv")## Rows: 304 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Feedback reference, Submitted on, Submitted by, User type, Organis...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
roadmap_2020 <- read_csv("./Augmented_data/roadmap_2020.csv")## Rows: 123 Columns: 11
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Feedback reference, Submitted on, Submitted by, User type, Organis...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#alternative library(janitor) roadmap %>% clean_names()
scrap20 <- roadmap_2020 %>%
rename(country = `Country of origin`,
id = `Feedback reference`,
time = `Submitted on` ,
person = `Submitted by` ,
type = `User type` ,
org = Organisation,
size = `Organisation size` ,
register = `Transparency register number`,
initiative = Initiative,
abstract = Paragraph,
text = pdf) %>%
mutate(time = dmy(time)) %>%
mutate(type = recode(type, #old value = new value
`NRO (Nichtregierungsorganisation)` = "Non-governmental organisation (NGO)",
`Universität/Forschungseinrichtung` = "Academic/research Institution",
`EU-Bürger/-in` = "EU citizen" ,
`Sonstiges` = "Other",
`Unternehmen/Unternehmensverband` = "Company/business organisation",
`Verbraucherverband` = "Consumer organisation",
`Behörde` = "Public authority",
`Gewerkschaft` = "Trade union",
`Wirtschaftsverband` = "Business association",
`-` = "Missing"
)) %>%
mutate(size = recode(size, #old value = new value
`mittel (50 bis 249 Beschäftigte)` = "Medium (50 to 249 employees)",
`klein (10 bis 49 Beschäftigte)` = "Small (10 to 49 employees)",
`groß (250 oder mehr Beschäftigte)` = "Large (250 or more)",
`-` = "Missing",
`sehr klein (1 bis 9 Beschäftigte)` = "Micro (1 to 9 employees)")) %>% #I need to find this workaround, the above procedure did not function
mutate(size = case_when(str_detect(size, "mittel") ~ "Medium (50 to 249 employees)", TRUE ~ size)) %>%
mutate(size = case_when(str_detect(size, "klein") ~ "Small (10 to 49 employees)", TRUE ~ size)) %>%
mutate(size = case_when(str_detect(size, "sehr") ~ "Micro (1 to 9 employees)", TRUE ~ size)) %>%
mutate(country = recode(country,
`Vereinigten Staaten` = "United States",
`Belgien` = "Belgium",
`Slowakei` = "Slovakia",
`Italien` = "Italy",
`Niederlande` = "Netherlands",
`Dänemark` = "Denmark",
`Vereinigtes Königreich` = "United Kingdom",
`Frankreich` = "France",
`-` = "Missing",
`international` = "Other",
`Spanien` = "Spain",
`Österreich` = "Austria",
`Schweden` = "Sweden",
`Polen` = "Poland",
`Irland` = "Ireland",
`Finnland` = "Finland",
`Deutschland` = "Germany",
`Ungarn` = "Hungary",
`Tschechien` = "Czech Republic",
`Rumänien` = "Romania",
`Bulgarien` = "Bulgaria"))
scrap21 <-commission_adoption_2021 %>%
rename(country = `Country of origin`,
id = `Feedback reference`,
time = `Submitted on` ,
person = `Submitted by` ,
type = `User type` ,
org = Organisation,
size = `Organisation size` ,
register = `Transparency register number`,
initiative = Initiative,
abstract = Paragraph,
text = pdf) %>%
mutate(time = dmy(time)) %>%
mutate(type = recode(type,
`Ukyo Mori` = "Other",
`Johannes Kröhnert` = "Other",
`-` = "Missing")) %>%
mutate(country = recode(country,
`Regional` = "Other",
`Local` = "Other",
`feedback.usertype.company` = "Other",
`feedback.usertype.business_association` = "Other",
`National` = "Other")) %>%
mutate(size = recode(size,
`-` = "Missing"))
scrap20 <- scrap20 %>% mutate(consult_round = "two")
scrap21 <- scrap21 %>% mutate(consult_round = "three")#problem with scrap 20 and the ids: F550611 and F550610 they are doubles (with empty abstract and text section) complete entry is: F550619
# scrap 20 hast 123 rows but should have 133 !
#after filtering : 121 rows
scrap20 <- scrap20 %>%
filter (id != "F550611", id != "F550610")#problem2: missing on all variables
scrap20 %>% filter(is.na(abstract))scrap20 %>% filter(id == "-")scrap20 <- scrap20 %>% filter(id !="-")#there are n = 85 pdfs in the folder but only n = 69 [text] in the csv
#there are n = 49 entries with only an abstract but no text: filter(!is.na(abstract), is.na(text))
scrap20 %>% filter(!is.na(text))submission <- rbind(scrap20,scrap21)with different cell and column numbers
they share: id, time, person, type, org, size, register, country, text
tidy_df1$time<- as.Date(tidy_df1$time, "%d.%m.%Y")tidy_df1 <- tidy_df1 %>% relocate(person, .after = time )tidy_df1 <- tidy_df1 %>% relocate(type, .after = person )tidy_df1 <- tidy_df1 %>% relocate(org, .after = type )tidy_df1 <- tidy_df1 %>% relocate(size, .after = org )tidy_df1 <- tidy_df1 %>% relocate(register, .after = size )tidy_df1 <- tidy_df1 %>% relocate(country, .after = register )tidy_df1 <- tidy_df1 %>% relocate(text, .after = country )submission <- submission %>% relocate(text, .after = country )###Merge all 3 data frames together
three_submission <- full_join( tidy_df1, submission, by = c("id", "time", "type", "size", "org", "register", "text", "consult_round", "person", "country"))saveRDS(three_submission, "three_submission.rds")reg_linear <- lm(
coop_member_states ~ research_innov * excel_research+
benefits_ai+ public_sector+ private_sector+ SME+ lighthouse+net_centres+ testing_fac+startup_finance,
data = three_submission)
model_parameters(reg_linear) %>%
plot()check_normality(reg_linear) %>%
plot()model_parameters(reg_linear)#Interaction
#library(sjPlot)
#plot_model(
# reg_linear,
# type = "int"
#)library(datawizard)## Warning: Paket 'datawizard' wurde unter R Version 4.1.3 erstellt
three_submission %>%
select(coop_member_states,
skills,
SME,
research_innov) %>%
describe_distribution(iqr = FALSE)library(stargazer)##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
stargazer(reg_linear)##
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Thu, Mar 23, 2023 - 21:40:29
## \begin{table}[!htbp] \centering
## \caption{}
## \label{}
## \begin{tabular}{@{\extracolsep{5pt}}lc}
## \\[-1.8ex]\hline
## \hline \\[-1.8ex]
## & \multicolumn{1}{c}{\textit{Dependent variable:}} \\
## \cline{2-2}
## \\[-1.8ex] & coop\_member\_states \\
## \hline \\[-1.8ex]
## research\_innov & 0.502$^{***}$ \\
## & (0.058) \\
## & \\
## excel\_research & 0.446$^{***}$ \\
## & (0.059) \\
## & \\
## benefits\_ai & 0.049$^{*}$ \\
## & (0.025) \\
## & \\
## public\_sector & 0.077$^{***}$ \\
## & (0.028) \\
## & \\
## private\_sector & 0.066$^{**}$ \\
## & (0.029) \\
## & \\
## SME & 0.004 \\
## & (0.028) \\
## & \\
## lighthouse & 0.029 \\
## & (0.027) \\
## & \\
## net\_centres & 0.160$^{***}$ \\
## & (0.034) \\
## & \\
## testing\_fac & 0.010 \\
## & (0.029) \\
## & \\
## startup\_finance & $-$0.017 \\
## & (0.029) \\
## & \\
## research\_innov:excel\_research & $-$0.093$^{***}$ \\
## & (0.014) \\
## & \\
## Constant & 0.487$^{***}$ \\
## & (0.151) \\
## & \\
## \hline \\[-1.8ex]
## Observations & 1,017 \\
## R$^{2}$ & 0.441 \\
## Adjusted R$^{2}$ & 0.434 \\
## Residual Std. Error & 0.885 (df = 1005) \\
## F Statistic & 71.963$^{***}$ (df = 11; 1005) \\
## \hline
## \hline \\[-1.8ex]
## \textit{Note:} & \multicolumn{1}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
## \end{tabular}
## \end{table}
three_submission %>%
select(coop_member_states,
skills,
SME,
research_innov,
country) %>%
drop_na() %>%
group_by(country) %>%
summarize(research_mean = mean(research_innov),
coop_mean = mean(coop_member_states)) library(sjPlot)## Warning: Paket 'sjPlot' wurde unter R Version 4.1.3 erstellt
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
reg_linear %>%
plot_model(
type = "pred",
terms = "research_innov"
)